Project_V1

Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(fmsb))

data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")

filteredData = select(data, PID, BSSQ_1:BSSQ_15,ASSQ_1:ASSQ_15,age, VRexperience, ssq_full)

#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)

#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)

#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom

filteredData = mutate(filteredData, age_group = case_when(
  age >= 16 & age <= 21 ~ "16 to 21",
  age >= 22 & age <= 29 ~ "22 to 29",
  age >= 30 & age <= 37 ~ "30 to 37",
  age >= 38 & age <= 45 ~ "38 to 45",
  age > 45 ~ "above 45"
))

#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)

#renaming columns to be more informative
names(filteredData)[names(filteredData) == 'BSSQ_1'] <- 'BSSQ_discomfort'
names(filteredData)[names(filteredData) == 'BSSQ_2'] <- 'BSSQ_fatigue'
names(filteredData)[names(filteredData) == 'BSSQ_3'] <- 'BSSQ_headache'
names(filteredData)[names(filteredData) == 'BSSQ_4'] <- 'BSSQ_eyestrain'
names(filteredData)[names(filteredData) == 'BSSQ_5'] <- 'BSSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'BSSQ_6'] <- 'BSSQ_salivation'
names(filteredData)[names(filteredData) == 'BSSQ_7'] <- 'BSSQ_sweating'
names(filteredData)[names(filteredData) == 'BSSQ_8'] <- 'BSSQ_nausea'
names(filteredData)[names(filteredData) == 'BSSQ_9'] <- 'BSSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'BSSQ_10'] <- 'BSSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'BSSQ_11'] <- 'BSSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'BSSQ_12'] <- 'BSSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'BSSQ_13'] <- 'BSSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'BSSQ_14'] <- 'BSSQ_vertigo'
names(filteredData)[names(filteredData) == 'BSSQ_15'] <- 'BSSQ_stomach_awareness'




names(filteredData)[names(filteredData) == 'ASSQ_1'] <- 'ASSQ_discomfort'
names(filteredData)[names(filteredData) == 'ASSQ_2'] <- 'ASSQ_fatigue'
names(filteredData)[names(filteredData) == 'ASSQ_3'] <- 'ASSQ_headache'
names(filteredData)[names(filteredData) == 'ASSQ_4'] <- 'ASSQ_eyestrain'
names(filteredData)[names(filteredData) == 'ASSQ_5'] <- 'ASSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'ASSQ_6'] <- 'ASSQ_salivation'
names(filteredData)[names(filteredData) == 'ASSQ_7'] <- 'ASSQ_sweating'
names(filteredData)[names(filteredData) == 'ASSQ_8'] <- 'ASSQ_nausea'
names(filteredData)[names(filteredData) == 'ASSQ_9'] <- 'ASSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'ASSQ_10'] <- 'ASSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'ASSQ_11'] <- 'ASSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'ASSQ_12'] <- 'ASSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'ASSQ_13'] <- 'ASSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'ASSQ_14'] <- 'ASSQ_vertigo'
names(filteredData)[names(filteredData) == 'ASSQ_15'] <- 'ASSQ_stomach_awareness'

#isolating the data dictionary
data_dict = read_excel("Project_1_Data.xlsx", sheet = "data_dictionary")

#filter into groups with experience or not
withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')

#taking averages for with/without experience
yes_avg_d_discomfort = mean(withVRexperience$d_discomfort)
yes_avg_d_fatigue = mean(withVRexperience$d_fatigue)
yes_avg_d_headache = mean(withVRexperience$d_headache)
yes_avg_d_eyestrain = mean(withVRexperience$d_eyestrain)
yes_avg_d_difficulty_focusing = mean(withVRexperience$d_difficulty_focusing)
yes_avg_d_salivation = mean(withVRexperience$d_salivation)
yes_avg_d_sweating = mean(withVRexperience$d_sweating)
yes_avg_d_nausea = mean(withVRexperience$d_nausea)
yes_avg_d_difficulty_concentrating = mean(withVRexperience$d_difficulty_concentrating)
yes_avg_d_fullness = mean(withVRexperience$d_fullness_of_head)
yes_avg_d_vision = mean(withVRexperience$d_blurred_vision)
yes_avg_d_dizziness_o = mean(withVRexperience$d_dizziness_o)
yes_avg_d_dizziness_c = mean(withVRexperience$d_dizziness_c)
yes_avg_d_vertigo = mean(withVRexperience$d_vertigo)
yes_avg_d_stomach = mean(withVRexperience$d_stomach_awareness)


no_avg_d_discomfort = mean(noVRexperience$d_discomfort)
no_avg_d_fatigue = mean(noVRexperience$d_fatigue)
no_avg_d_headache = mean(noVRexperience$d_headache)
no_avg_d_eyestrain = mean(noVRexperience$d_eyestrain)
no_avg_d_difficulty_focusing = mean(noVRexperience$d_difficulty_focusing)
no_avg_d_salivation = mean(noVRexperience$d_salivation)
no_avg_d_sweating = mean(noVRexperience$d_sweating)
no_avg_d_nausea = mean(noVRexperience$d_nausea)
no_avg_d_difficulty_concentrating = mean(noVRexperience$d_difficulty_concentrating)
no_avg_d_fullness = mean(noVRexperience$d_fullness_of_head)
no_avg_d_vision = mean(noVRexperience$d_blurred_vision)
no_avg_d_dizziness_o = mean(noVRexperience$d_dizziness_o)
no_avg_d_dizziness_c = mean(noVRexperience$d_dizziness_c)
no_avg_d_vertigo = mean(noVRexperience$d_vertigo)
no_avg_d_stomach = mean(noVRexperience$d_stomach_awareness)

#taking averages for different age groups
grp1 = filter(filteredData, age_group == '16 to 21')
grp2 = filter(filteredData, age_group == '22 to 29')
grp3 = filter(filteredData, age_group == '30 to 37')
grp4 = filter(filteredData, age_group == '38 to 45')
grp5 = filter(filteredData, age_group == 'above 45')

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

data_by_age_group = data.frame(
  age_group = c("16 to 21", '22 to 29','30 to 37','38 to 45','Above 45'),
  mean_ssq = mean_sqq_ages
  
)

Summary of Findings

(executive summary goes here)

Initial Data Analysis (IDA)

Source

Our data was sourced from Cosette Saunder’s PhD and honors thesis paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”. (ask about how much context we need to provide here)

Structure

The data contained 336 records of participants in the study, each with 51 variables. In particular, our project focused the following variables:

  • Baseline SSQ (BSSQ) of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants before undergoing VR, on a scale of 1 to 10.

  • Active SSQ (ASSQ) of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants after undergoing VR, on a scale of 1 to 10

  • The age of the participants (quantitative, discrete); they were then sorted into age groups – re-classed as ‘factor’ (qualitative, ordinal)

    • This was to allow the relationship between age groups and symptoms reported to be seen
  • Whether the participant has had VRexperience (qualitative, nominal); this was reclassified from ‘character’ into ‘factor’.

    • R misidentified this as ‘chr’, it should be a qualitative variable
  • The change (\(\Delta\)) between baseline (before) and active (after) SSQ was calculated for each participant for each symptom, and now we take the average \(\Delta\) for each symptom for each group. (quantitative, discrete)

Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)

age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
                 type = 'pie')
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
                            showlegend = TRUE)
age_pie
Code
library(RColorBrewer)

data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)

vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
                 type = 'pie')
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
                            showlegend = TRUE)

vr_pie

Limitations

A limitation faced with this data set is the fact that the symptoms and their severity, for both baseline and active SSQs are self-reported, meaning that the interpretation of the scale can vary from person to person, making the reported values subjective and less reliable.

Another limitation is that VR experience is simply classified by a ‘yes’ or ‘no’, which is not very descriptive of how much exposure to VR the participant has had in the past, or how recent that experience was.

Assumptions

We assumed that the participants were truthful and correct about whether or not they have participated in VR before. We also assumed that the participants followed the instructions in the symptoms survey correctly and reported their symptoms on a scale of 1 to 10, with 1 being the least severe and 10 being the most.

Code
library(plotly)

yes_means = c(yes_avg_d_discomfort,
              yes_avg_d_fatigue,
              yes_avg_d_headache,
              yes_avg_d_eyestrain,
              yes_avg_d_sweating,
              yes_avg_d_nausea,
              yes_avg_d_dizziness_c,
              yes_avg_d_dizziness_o,
              yes_avg_d_difficulty_focusing,
              yes_avg_d_difficulty_concentrating,
              yes_avg_d_salivation,
              no_avg_d_vision
              )

no_means = c(no_avg_d_discomfort,
             no_avg_d_fatigue,
             no_avg_d_headache,
             no_avg_d_eyestrain,
             no_avg_d_sweating,
             no_avg_d_nausea,
             no_avg_d_dizziness_c,
             no_avg_d_dizziness_o,
             no_avg_d_difficulty_focusing,
             no_avg_d_difficulty_concentrating,
             no_avg_d_salivation,
             no_avg_d_vision
             )

fig <- plot_ly(
  type  = 'scatterpolar',
  fill = 'toself'
)

fig <- fig %>% 
  add_trace(
    r = yes_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea', 'Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision'),
    name = 'With VR Experience'
  )

fig <- fig %>% 
  add_trace(
    r = no_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea','Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision'),
    name = 'No VR Experience'
  )

fig <- fig %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(-0.5,2)
      )
    ),
    showlegend = T
  )

fig
Code
library(plotly)
library(tidyselect)

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

fig2 <- plot_ly(
  type = 'scatterpolar',
  fill = 'toself',
  r = mean_sqq_ages,
  theta = c("16 to 21",
            "22 to 29",
            "30 to 37",
            "38 to 45",
            "above 45")
)
fig2 <- fig2 %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(0,20)
      )
    ),
    showlegend = T
  )

fig2
No scatterpolar mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Code
fig3 = ggplot(data_by_age_group, aes(x = age_group, y = mean_ssq)) +
  geom_bar(stat = 'identity')



ggplotly(fig3)
Code
library(tidyverse)

p = ggplot(filteredData, aes(x = age, y = ssq_full, colour = VRexperience)) +
  geom_point()

p

Code
plt = ggplot(filteredData, aes(x = VRexperience, y = ssq_full)) +
  geom_boxplot()

ggplotly(plt)
Code
plt2 = ggplot(filteredData, aes(x = age_group, y = ssq_full)) +
  geom_boxplot()

ggplotly(plt2)